# SPDX-FileCopyrightText: 2018 dargen3
#
# SPDX-License-Identifier: AGPL-3.0-or-later

# -*- coding: utf-8 -*-
from html.parser import HTMLParser

from intelmq.lib import utils
from intelmq.lib.bot import ParserBot
import intelmq.lib.harmonization as harmonization


class MyHTMLParser(HTMLParser):

    lsData = ""
    lsTag = ""
    lsValue = ""

    def handle_starttag(self, tag, attrs):
        self.lsTag = tag
        for x in attrs:
            if x[0] == "value":
                self.lsValue = x[1]

    def handle_data(self, data):
        self.lsData = data


parser = MyHTMLParser()


class MalwareurlParserBot(ParserBot):
    """Parse the MalwareURL feed"""
    def process(self):
        report = self.receive_message()
        raw_report = utils.base64_decode(report["raw"])
        report_list = [row.strip() for row in raw_report.splitlines()]
        index = 0
        actual_line = report_list[index]
        while parser.lsData != "IPs":
            index += 1
            actual_line = report_list[index]
            parser.feed(actual_line)
        count = 0
        while actual_line[:8] != "</TBODY>":
            index += 1
            actual_line = report_list[index]
            parser.feed(actual_line)
            if parser.lsTag == "input":
                if count % 2 == 0:
                    url = parser.lsValue
                    url_raw_line = actual_line
                else:
                    ip = parser.lsValue
                    event = self.new_event(report)
                    if harmonization.FQDN.is_valid(url, sanitize=True):
                        event.add("source.fqdn", url)
                    if harmonization.IPAddress.is_valid(ip, sanitize=True):
                        event.add("source.ip", ip)
                    event.add("raw", url_raw_line + actual_line)
                    event.add("classification.type", "phishing")
                    self.send_message(event)
                count += 1
        self.acknowledge_message()


BOT = MalwareurlParserBot
